TweetTopology.java example

Explorer

Real-Time_Analytics_with_Apache_Storm__Udacity_Course-master
- lesson1
  - stage1
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReporterExclamationTopology.java
  - stage2
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReporterExclamationTopology.java
        spout
        RandomSentenceSpout.java
  - stage3
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReporterExclamationTopology.java
        spout
        RandomSentenceSpout.java
- lesson2
  - stage1
    - src
      - jvm
        udacity
        storm
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage2
    - src
      - jvm
        udacity
        storm
        SentenceCountTopology.java
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage3
    - src
      - jvm
        udacity
        storm
        SentenceCountTopology.java
        SentenceWordCountTopology.java
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage4
    - src
      - jvm
        udacity
        storm
        SentenceCountTopology.java
        SentenceWordCountTopology.java
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage5
    - src
      - jvm
        udacity
        storm
        TweetTopology.java
  - stage6
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        TweetSpout.java
        TweetTopology.java
  - stage7
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        RollingCountBolt.java
        TweetSpout.java
        TweetTopology.java
        tools
        NthLastModifiedTimeTracker.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
- lesson3
  - stage1
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        SplitSentence.java
        TweetSpout.java
        TweetTopology.java
  - stage2
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        SplitSentence.java
        TweetSpout.java
        TweetTopology.java
        URLBolt.java
  - stage3
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        SplitSentence.java
        TweetSpout.java
        TweetTopology.java
        URLBolt.java
  - stage4
    - src
      - jvm
        udacity
        storm
        AbstractRankerBolt.java
        CountBolt.java
        IntermediateRankingsBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        RollingCountBolt.java
        TopNTweetTopology.java
        TotalRankingsBolt.java
        TweetSpout.java
        spout
        RandomSentenceSpout.java
        tools
        NthLastModifiedTimeTracker.java
        Rankable.java
        RankableObjectWithFields.java
        Rankings.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
  - stage5
    - src
      - jvm
        udacity
        storm
        AbstractRankerBolt.java
        CountBolt.java
        IntermediateRankingsBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        RollingCountBolt.java
        TopNTweetTopology.java
        TotalRankingsBolt.java
        TweetSpout.java
        spout
        RandomSentenceSpout.java
        tools
        NthLastModifiedTimeTracker.java
        Rankable.java
        RankableObjectWithFields.java
        Rankings.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
  - stage6
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReportBolt.java
        spout
        MyLikesSpout.java
        MyNamesSpout.java
  - stage7
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReportBolt.java
        spout
        MyLikesSpout.java
        MyNamesSpout.java

package udacity.storm;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

import twitter4j.conf.ConfigurationBuilder;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.StallWarning;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;

import com.lambdaworks.redis.RedisClient;
import com.lambdaworks.redis.RedisConnection;

/**
 * This is a basic example of a Storm topology.
 */
public class TweetTopology {

  /**
   * A spout that uses Twitter streaming API for continuously
   * getting tweets
   */
  public static class TweetSpout extends BaseRichSpout
  {
    // Twitter API authentication credentials
    String custkey, custsecret;
    String accesstoken, accesssecret;

    // To output tuples from spout to the next stage bolt
    SpoutOutputCollector collector;

    // Twitter4j - twitter stream to get tweets
    TwitterStream twitterStream;

    // Shared queue for getting buffering tweets received
    LinkedBlockingQueue<String> queue = null;

    // Class for listening on the tweet stream - for twitter4j
    private class TweetListener implements StatusListener {

      // Implement the callback function when a tweet arrives
      @Override
      public void onStatus(Status status)
      {
        // add the tweet into the queue buffer
        queue.offer(status.getText());
      }

      @Override
      public void onDeletionNotice(StatusDeletionNotice sdn)
      {
      }

      @Override
      public void onTrackLimitationNotice(int i)
      {
      }

      @Override
      public void onScrubGeo(long l, long l1)
      {
      }

      @Override
      public void onStallWarning(StallWarning warning)
      {
      }

      @Override
      public void onException(Exception e)
      {
        e.printStackTrace();
      }
    };

    /**
     * Constructor for tweet spout that accepts the credentials
     */
    public TweetSpout(
        String                key,
        String                secret,
        String                token,
        String                tokensecret)
    {
      custkey = key;
      custsecret = secret;
      accesstoken = token;
      accesssecret = tokensecret;
    }

    @Override
    public void open(
        Map                     map,
        TopologyContext         topologyContext,
        SpoutOutputCollector    spoutOutputCollector)
    {
      // create the buffer to block tweets
      queue = new LinkedBlockingQueue<String>(1000);

      // save the output collector for emitting tuples
      collector = spoutOutputCollector;


      // build the config with credentials for twitter 4j
      ConfigurationBuilder config =
          new ConfigurationBuilder()
                 .setOAuthConsumerKey(custkey)
                 .setOAuthConsumerSecret(custsecret)
                 .setOAuthAccessToken(accesstoken)
                 .setOAuthAccessTokenSecret(accesssecret);

      // create the twitter stream factory with the config
      TwitterStreamFactory fact =
          new TwitterStreamFactory(config.build());

      // get an instance of twitter stream
      twitterStream = fact.getInstance();

      // provide the handler for twitter stream
      twitterStream.addListener(new TweetListener());

      // start the sampling of tweets
      twitterStream.sample();
    }

    @Override
    public void nextTuple()
    {
      // try to pick a tweet from the buffer
      String ret = queue.poll();

      // if no tweet is available, wait for 50 ms and return
      if (ret==null)
      {
        Utils.sleep(50);
        return;
      }

      // now emit the tweet to next stage bolt
      collector.emit(new Values(ret));
    }

    @Override
    public void close()
    {
      // shutdown the stream - when we are going to exit
      twitterStream.shutdown();
    }

    /**
     * Component specific configuration
     */
    @Override
    public Map<String, Object> getComponentConfiguration()
    {
      // create the component config
      Config ret = new Config();

      // set the parallelism for this spout to be 1
      ret.setMaxTaskParallelism(1);

      return ret;
    }

    @Override
    public void declareOutputFields(
        OutputFieldsDeclarer outputFieldsDeclarer)
    {
      // tell storm the schema of the output tuple for this spout
      // tuple consists of a single column called 'tweet'
      outputFieldsDeclarer.declare(new Fields("tweet"));
    }
  }

  /**
   * A bolt that parses the tweet into words
   */
  public static class ParseTweetBolt extends BaseRichBolt
  {
    // To output tuples from this bolt to the count bolt
    OutputCollector collector;

    @Override
    public void prepare(
        Map                     map,
        TopologyContext         topologyContext,
        OutputCollector         outputCollector)
    {
      // save the output collector for emitting tuples
      collector = outputCollector;
    }

    @Override
    public void execute(Tuple tuple)
    {
      // get the 1st column 'tweet' from tuple
      String tweet = tuple.getString(0);

      // provide the delimiters for splitting the tweet
      String delims = "[ .,?!]+";

      // now split the tweet into tokens
      String[] tokens = tweet.split(delims);

      // for each token/word, emit it
      for (String token: tokens) {
        collector.emit(new Values(token));
      }
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer)
    {
      // tell storm the schema of the output tuple for this spout
      // tuple consists of a single column called 'tweet-word'
      declarer.declare(new Fields("tweet-word"));
    }
  }

  /**
   * A bolt that counts the words that it receives
   */
  static class CountBolt extends BaseRichBolt {

    // To output tuples from this bolt to the next stage bolts, if any
    private OutputCollector collector;

    // Map to store the count of the words
    private Map<String, Integer> countMap;

    @Override
    public void prepare(
        Map                     map,
        TopologyContext         topologyContext,
        OutputCollector         outputCollector)
    {

      // save the collector for emitting tuples
      collector = outputCollector;

      // create and initialize the map
      countMap = new HashMap<String, Integer>();
    }

    @Override
    public void execute(Tuple tuple)
    {
      // get the word from the 1st column of incoming tuple
      String word = tuple.getString(0);

      // check if the word is present in the map
      if (countMap.get(word) == null) {

        // not present, add the word with a count of 1
        countMap.put(word, 1);
      } else {

        // already there, hence get the count
        Integer val = countMap.get(word);

        // increment the count and save it to the map
        countMap.put(word, ++val);
      }

      // emit the word and count
      collector.emit(new Values(word, countMap.get(word)));
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
    {
      // tell storm the schema of the output tuple for this spout
      // tuple consists of a two columns called 'word' and 'count'

      // declare the first column 'word', second column 'count'
      outputFieldsDeclarer.declare(new Fields("word","count"));
    }
  }

  /**
   * A bolt that prints the word and count to redis
   */
  static class ReportBolt extends BaseRichBolt
  {
    // place holder to keep the connection to redis
    transient RedisConnection<String,String> redis;

    @Override
    public void prepare(
        Map                     map,
        TopologyContext         topologyContext,
        OutputCollector         outputCollector)
    {
      // instantiate a redis connection
      RedisClient client = new RedisClient("localhost",6379);

      // initiate the actual connection
      redis = client.connect();
    }

    @Override
    public void execute(Tuple tuple)
    {
      // access the first column 'word'
      String word = tuple.getStringByField("word");

      // access the second column 'count'
      Integer count = tuple.getIntegerByField("count");

      // publish the word count to redis using word as the key
      redis.publish("WordCountTopology", word + "|" + Long.toString(count));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer)
    {
      // nothing to add - since it is the final bolt
    }
  }

  public static void main(String[] args) throws Exception
  {
    // create the topology
    TopologyBuilder builder = new TopologyBuilder();

    /*
     * In order to create the spout, you need to get twitter credentials
     * If you need to use Twitter firehose/Tweet stream for your idea,
     * create a set of credentials by following the instructions at
     *
     * https://dev.twitter.com/discussions/631
     *
     */

    // now create the tweet spout with the credentials
    TweetSpout tweetSpout = new TweetSpout(
        "[Your customer key]",
        "[Your secret key]",
        "[Your access token]",
        "[Your access secret]"
    );

    // attach the tweet spout to the topology - parallelism of 1
    builder.setSpout("tweet-spout", tweetSpout, 1);

    //*********************************************************************
    // Complete the Topology.
    // Part 1: // attach the parse tweet bolt, parallelism of 10 (what grouping is needed?)
    builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
    // Part 2: // attach the count bolt, parallelism of 15 (what grouping is needed?)
    builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
    // Part 3: attach the report bolt, parallelism of 1 (what grouping is needed?)
    builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt");
    // Submit and run the topology.

    //*********************************************************************


    // create the default config object
    Config conf = new Config();

    // set the config in debugging mode
    conf.setDebug(true);

    if (args != null && args.length > 0) {

      // run it in a live cluster

      // set the number of workers for running all spout and bolt tasks
      conf.setNumWorkers(3);

      // create the topology and submit with config
      StormSubmitter.submitTopology(args[0], conf, builder.createTopology());

    } else {

      // run it in a simulated local cluster

      // set the number of threads to run - similar to setting number of workers in live cluster
      conf.setMaxTaskParallelism(3);

      // create the local cluster instance
      LocalCluster cluster = new LocalCluster();

      // submit the topology to the local cluster
      cluster.submitTopology("tweet-word-count", conf, builder.createTopology());

      // let the topology run for 30 seconds. note topologies never terminate!
      Utils.sleep(30000);

      // now kill the topology
      cluster.killTopology("tweet-word-count");

      // we are done, so shutdown the local cluster
      cluster.shutdown();
    }
  }
}